{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "-"
    }
   },
   "source": [
    "# 图像卷积\n",
    "\n",
    "互相关运算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('..')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "origin_pos": 3,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [],
   "source": [
    "from d2l import mindspore as d2l\n",
    "from mindspore import nn, ops, value_and_grad\n",
    "\n",
    "def corr2d(X, K):  \n",
    "    \"\"\"计算二维互相关运算。\"\"\"\n",
    "    h, w = K.shape\n",
    "    Y = ops.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))\n",
    "    for i in range(Y.shape[0]):\n",
    "        for j in range(Y.shape[1]):\n",
    "            Y[i, j] = (X[i:i + h, j:j + w] * K).sum()\n",
    "    return Y"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "验证上述二维互相关运算的输出"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "origin_pos": 6,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[WARNING] KERNEL(4098363,7f10a7d9c740,python):2021-11-08-00:04:34.061.088 [mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc:96] ReducePrecision] Kernel [TensorScatterUpdate] does not support int64, cast input 1 to int32.\n",
      "[WARNING] PRE_ACT(4098363,7f10a7d9c740,python):2021-11-08-00:04:34.061.167 [mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.cc:83] Run] Reduce precision for [TensorScatterUpdate] input 1\n",
      "[WARNING] KERNEL(4098363,7f10a7d9c740,python):2021-11-08-00:04:34.066.353 [mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc:96] ReducePrecision] Kernel [TensorScatterUpdate] does not support int64, cast input 1 to int32.\n",
      "[WARNING] PRE_ACT(4098363,7f10a7d9c740,python):2021-11-08-00:04:34.066.405 [mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.cc:83] Run] Reduce precision for [TensorScatterUpdate] input 1\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Tensor(shape=[2, 2], dtype=Float32, value=\n",
       "[[ 1.90000000e+01,  2.50000000e+01],\n",
       " [ 3.70000000e+01,  4.30000000e+01]])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = d2l.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])\n",
    "K = d2l.tensor([[0.0, 1.0], [2.0, 3.0]])\n",
    "corr2d(X, K)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "实现二维卷积层"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "origin_pos": 9,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [],
   "source": [
    "class Conv2D(nn.Cell):\n",
    "    def __init__(self, kernel_size):\n",
    "        super().__init__()\n",
    "        self.weight = Parameter(d2l.normal((kernel_size), 0, 1))\n",
    "        self.bias = Parameter(ops.zeros(1))\n",
    "\n",
    "    def construct(self, x):\n",
    "        return corr2d(x, self.weight) + self.bias"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "卷积层的一个简单应用：\n",
    "检测图像中不同颜色的边缘"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "origin_pos": 12,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[WARNING] KERNEL(4098363,7f10a7d9c740,python):2021-11-08-00:04:34.103.859 [mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc:96] ReducePrecision] Kernel [TensorScatterUpdate] does not support int64, cast input 1 to int32.\n",
      "[WARNING] PRE_ACT(4098363,7f10a7d9c740,python):2021-11-08-00:04:34.103.952 [mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.cc:83] Run] Reduce precision for [TensorScatterUpdate] input 1\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Tensor(shape=[6, 8], dtype=Float32, value=\n",
       "[[ 1.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00,  1.00000000e+00,  1.00000000e+00],\n",
       " [ 1.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00,  1.00000000e+00,  1.00000000e+00],\n",
       " [ 1.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00,  1.00000000e+00,  1.00000000e+00],\n",
       " [ 1.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00,  1.00000000e+00,  1.00000000e+00],\n",
       " [ 1.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00,  1.00000000e+00,  1.00000000e+00],\n",
       " [ 1.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00,  1.00000000e+00,  1.00000000e+00]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = d2l.ones((6, 8))\n",
    "X[:, 2:6] = 0\n",
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "origin_pos": 15,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [],
   "source": [
    "K = d2l.tensor([[1.0, -1.0]])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "输出`Y`中的1代表从白色到黑色的边缘，-1代表从黑色到白色的边缘"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "origin_pos": 17,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[WARNING] KERNEL(4098363,7f10a7d9c740,python):2021-11-08-00:04:34.125.417 [mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc:96] ReducePrecision] Kernel [TensorScatterUpdate] does not support int64, cast input 1 to int32.\n",
      "[WARNING] PRE_ACT(4098363,7f10a7d9c740,python):2021-11-08-00:04:34.125.505 [mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.cc:83] Run] Reduce precision for [TensorScatterUpdate] input 1\n",
      "[WARNING] KERNEL(4098363,7f10a7d9c740,python):2021-11-08-00:04:34.131.386 [mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc:96] ReducePrecision] Kernel [TensorScatterUpdate] does not support int64, cast input 1 to int32.\n",
      "[WARNING] PRE_ACT(4098363,7f10a7d9c740,python):2021-11-08-00:04:34.131.469 [mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.cc:83] Run] Reduce precision for [TensorScatterUpdate] input 1\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Tensor(shape=[6, 7], dtype=Float32, value=\n",
       "[[ 0.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00, -1.00000000e+00,  0.00000000e+00],\n",
       " [ 0.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00, -1.00000000e+00,  0.00000000e+00],\n",
       " [ 0.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00, -1.00000000e+00,  0.00000000e+00],\n",
       " [ 0.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00, -1.00000000e+00,  0.00000000e+00],\n",
       " [ 0.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00, -1.00000000e+00,  0.00000000e+00],\n",
       " [ 0.00000000e+00,  1.00000000e+00,  0.00000000e+00 ...  0.00000000e+00, -1.00000000e+00,  0.00000000e+00]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Y = corr2d(X, K)\n",
    "Y"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "卷积核`K`只可以检测垂直边缘"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "origin_pos": 19,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[WARNING] KERNEL(4098363,7f10a7d9c740,python):2021-11-08-00:04:34.278.327 [mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc:96] ReducePrecision] Kernel [TensorScatterUpdate] does not support int64, cast input 1 to int32.\n",
      "[WARNING] PRE_ACT(4098363,7f10a7d9c740,python):2021-11-08-00:04:34.278.385 [mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.cc:83] Run] Reduce precision for [TensorScatterUpdate] input 1\n",
      "[WARNING] KERNEL(4098363,7f10a7d9c740,python):2021-11-08-00:04:34.288.696 [mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc:96] ReducePrecision] Kernel [TensorScatterUpdate] does not support int64, cast input 1 to int32.\n",
      "[WARNING] PRE_ACT(4098363,7f10a7d9c740,python):2021-11-08-00:04:34.288.793 [mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.cc:83] Run] Reduce precision for [TensorScatterUpdate] input 1\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Tensor(shape=[8, 5], dtype=Float32, value=\n",
       "[[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00],\n",
       " [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00],\n",
       " [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00],\n",
       " ...\n",
       " [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00],\n",
       " [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00],\n",
       " [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "corr2d(d2l.transpose(X), K)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "学习由`X`生成`Y`的卷积核"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "origin_pos": 22,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "batch 2, loss 0.272\n",
      "batch 4, loss 0.263\n",
      "batch 6, loss 0.254\n",
      "batch 8, loss 0.245\n",
      "batch 10, loss 0.237\n"
     ]
    }
   ],
   "source": [
    "conv2d = nn.Conv2d(1,1, kernel_size=(1, 2), has_bias=False, pad_mode='valid')\n",
    "\n",
    "X = X.reshape((1, 1, 6, 8))\n",
    "Y = Y.reshape((1, 1, 6, 7))\n",
    "lr = 3e-2\n",
    "\n",
    "loss_fn = nn.MSELoss()\n",
    "optim = nn.SGD(conv2d.trainable_params(), lr)\n",
    "\n",
    "# 定义前向传播函数\n",
    "def forward_fn(x, y):\n",
    "    z = conv2d(x)\n",
    "    loss = loss_fn(z, y).mean()\n",
    "    return loss\n",
    "\n",
    "# 获取梯度函数\n",
    "grad_fn = value_and_grad(forward_fn, None, weights=conv2d.trainable_params())\n",
    "\n",
    "# 定义模型单步训练\n",
    "def train(X, Y, optim):\n",
    "    loss, grads = grad_fn(X, Y)\n",
    "    loss = ops.depend(loss, optim(grads))\n",
    "    return loss\n",
    "    \n",
    "for i in range(10):\n",
    "    loss = train(X, Y, optim)\n",
    "    if (i + 1) % 2 == 0:\n",
    "        print(f'batch {i+1}, loss {loss.asnumpy():.3f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "所学的卷积核的权重张量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "origin_pos": 26,
    "tab": [
     "pytorch"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Tensor(shape=[1, 2], dtype=Float32, value=\n",
       "[[ 1.00198783e-01, -9.42235067e-02]])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d2l.reshape(conv2d.weight, (1, 2))"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Slideshow",
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  },
  "rise": {
   "autolaunch": true,
   "enable_chalkboard": true,
   "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
   "scroll": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
